#### Ponderador MatchDf
library(survey)
library(svrep)
library(tidyverse)

pb2022mt <- pb2022mt %>% mutate(educ_r = factor(case_when(
  educ_w01 >= 1 & educ_w01 <= 5 | educ_w01 == 99 ~ 1,
  educ_w01 == 6  ~ 2,
  educ_w01 == 7  ~ 3,
  educ_w01 == 8  ~ 4,
  educ_w01 == 9 | educ_w01 == 10 ~ 5
), labels = c("Escolar","Educacion Tecnica Superior Incompleta","Educacion Tecnica Superior Completa",
              "Profesional Incompleto","Profesional y Postgrado")),
edad_r = factor(case_when(
  attribute_235_w01 %in% c(18:29) ~ 1,
  attribute_235_w01 %in% c(30:39) ~ 2,
  attribute_235_w01 %in% c(40:49) ~ 3,
  attribute_235_w01 %in% c(50:65) ~ 4
),
labels = c("18-29","30-39","40-49", "50-65")
))

pb2022mt$sec <- ifelse(pb2022mt$educaF_w01 == "Secondary Ed.",1,0)
pb2022mt$ter_tec <- ifelse(pb2022mt$educaF_w01 == "Tertiary Tech.",1,0)
pb2022mt$ter_univ <- ifelse(pb2022mt$educaF_w01 == "Tertiary Univ",1,0)



pb2022mt.svy <- svydesign(ids = ~1, data = pb2022mt)

# Porcentajes en CASEN

edad.dist <- data.frame(edad_r = c("18-29","30-39","40-49", "50-65"),
                        Freq = nrow(pb2022mt) *  c(0.29, 0.21, 0.19, 0.31))
educ.dist <- data.frame(educ_r = c("Escolar","Educacion Tecnica Superior Incompleta","Educacion Tecnica Superior Completa",
                                   "Profesional Incompleto","Profesional y Postgrado"),
                        Freq = nrow(pb2022mt) * c(0.58,0.04,0.09,0.11,0.18))

k <- NULL
l <- list()

for (k in 2:20){
  # The weight is created
  
  pb2022mt.svy <- svydesign(ids = ~1, data = pb2022mt)
  pb2022mt.2.svy.rake <- rake(design = pb2022mt.svy, sample.margins = list(~educ_r,~edad_r),
                              population.margins = list(educ.dist,edad.dist))
  
  # A df with the values of education is created
  
  df <- as.data.frame(prop.table(table(pb2022mt.2.svy.rake$variables["educaF_w01"])))
  colnames(df)[2] <- "Unweighted Mean"
  df <- merge(df,as.data.frame(prop.table(svytable(~educaF_w01, design=pb2022mt.2.svy.rake))),
              by="educaF_w01")
  colnames(df)[3] <- "Pre-Trim Mean"
  df <- cbind(df,Population = c(.58,.13,.29))
  df$mediana <- median(weights(pb2022mt.2.svy.rake))
  df$iqr <- IQR(weights(pb2022mt.2.svy.rake))
  max_pre_trim <- max(weights(pb2022mt.2.svy.rake))
  
  #The weight is trimmed
  
  pb2022mt.2.svy.rake <- trimWeights(pb2022mt.2.svy.rake,
                                    lower=1/(median(weights(pb2022mt.2.svy.rake))+k*IQR(weights(pb2022mt.2.svy.rake))), 
                                    upper=median(weights(pb2022mt.2.svy.rake))+k*IQR(weights(pb2022mt.2.svy.rake)))
  df <- cbind(df,as.data.frame(prop.table(svytable(~educaF_w01, design=pb2022mt.2.svy.rake)))[2])
  colnames(df)[which(colnames(df)=="Freq")] <- "Weighted Mean" # This is the trimmed weight
  
  df$Variance <- 0
  df$Variance[1] <- svyvar(~sec,design=pb2022mt.2.svy.rake,na.rm=T)
  df$Variance[2] <- svyvar(~ter_tec,design=pb2022mt.2.svy.rake,na.rm=T)
  df$Variance[3] <- svyvar(~ter_univ,design=pb2022mt.2.svy.rake,na.rm=T)
  
  df$mse <- (df$`Weighted Mean`- df$Population)^2+df$Variance
  df$max_pre_trim <- max_pre_trim
  df$max_post_trim <- max(weights(pb2022mt.2.svy.rake))
  
  # A variable that informs the relative bias and the relative variance is created (Potter & Zheng, 2015, pg.2713)
  df$rel_bias <- 100*(df$Population-df$`Weighted Mean`)/df$Population
  df$rel_var <- 100*(df$`Weighted Mean`-df$`Pre-Trim Mean`)/df$`Pre-Trim Mean`
  
  df$k <- k
  
  # df is stored in a list  
  l[[k]] <- df
}

df2 <- do.call("rbind", l)
df2 <- df2 %>% arrange(educaF_w01) %>% group_by(educaF_w01) %>% mutate(delta_mse = mse-dplyr::lag(mse,n=1))

mse_plot22mt <- ggplot(data=filter(df2,educaF_w01!="Tertiary Tech."),aes(k, mse, color=educaF_w01)) + 
  geom_line() + geom_point() + labs(x="K",y="MSE",color="Educational Level",
                                    title="MSE Reduction by each additional K") + theme_bw()

delta_plot22mt <- ggplot(data=filter(df2,educaF_w01!="Tertiary Tech."), aes(k, delta_mse, color=educaF_w01)) + 
  geom_line() + geom_point() + labs(x="K",y="Delta MSE",color="Educational Level",
                                    title="Delta MSE Reduction by each additional K") + theme_bw()


#Weight trimming
k <- 9 # Antes el k = 6

pb2022mt.svy <- svydesign(ids = ~1, data = pb2022mt)

pb2022mt.2.svy.rake <- rake(design = pb2022mt.svy, sample.margins = list(~educ_r,~edad_r),
                            population.margins = list(educ.dist,edad.dist))

pb2022mt.2.svy.rake <- trimWeights(pb2022mt.2.svy.rake, lower=1/(median(weights(pb2022mt.2.svy.rake))+k*IQR(weights(pb2022mt.2.svy.rake))),
                                   upper=(median(weights(pb2022mt.2.svy.rake))+k*IQR(weights(pb2022mt.2.svy.rake))))

pb2022mt.w <- as_data_frame_with_weights(pb2022mt.2.svy.rake,full_wgt_name = "weight.match.educ.edad",
                                    rep_wgt_prefix = "REP_WGT_")

summary(pb2022mt.w$weight.match.educ.edad)
#prop.table(wtd.table(pb2022mt.w$educaF_w01, weights = pb2022mt.w$weight.match.educ.edad))
